import pandas as pd
from Bio import SeqIO

def fasta2seq(file_name):
    fasta_sequences = SeqIO.parse(open(file_name), 'fasta')
    seqs = []
    for entry in fasta_sequences:        
        seq = str(entry.__dict__['_seq'])
        seqs.append(seq)
    return seqs

def get_data(file_name):
    seq = fasta2seq(file_name)
    return pd.DataFrame(seq,columns=['seq'])

def filter_standard_amino_acid(data):
    valid_seq, invalid_seq = [], []
    for seq in data['seq']:
        non_standard_aa = set(seq).intersection(set(['B', 'J', 'O', 'U', 'X', 'Z']))
        if len(non_standard_aa) != 0:
            invalid_seq.append(seq)
        else:
            valid_seq.append(seq)
    
    return valid_seq, invalid_seq